Load Libraries

# install.packages('knitr', repos = c('http://rforge.net', 'http://cran.rstudio.org'), type = 'source')
# install.packages("ggplot2")
# install.packages("gganimate")
# install.packages("gifski")
# install.packages("png")
# install.packages("transformr")
library("knitr")
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 3.4.4
library("gganimate")
## Warning: package 'gganimate' was built under R version 3.4.4
library("gifski")
## Warning: package 'gifski' was built under R version 3.4.4
library("png")
## Warning: package 'png' was built under R version 3.4.4
library("transformr")
## Warning: package 'transformr' was built under R version 3.4.4

Dancing Landscape

Sources of Dancing Landscapes

  • Complex behavior
    • When factors that create landscapes are interdependent
    • When factors that create landscapes change over time

Examples of Dancing Landscapes

  • The stock market
  • Customer demand by product tag over time

Demonstration

f2D <- function(x, t) {
  return (cos(x) * sin(t * pi) - (abs(x)/10) + x/20)
}

par(mfrow=c(3, 3))
plot(f2D(seq(-10, 10, 0.1), 0.00), type="l")
plot(f2D(seq(-10, 10, 0.1), 0.17), type="l")
plot(f2D(seq(-10, 10, 0.1), 0.33), type="l")
plot(f2D(seq(-10, 10, 0.1), 0.50), type="l")
plot(f2D(seq(-10, 10, 0.1), 0.67), type="l")
plot(f2D(seq(-10, 10, 0.1), 0.83), type="l")
plot(f2D(seq(-10, 10, 0.1), 1.00), type="l")
plot(f2D(seq(-10, 10, 0.1), 1.17), type="l")
plot(f2D(seq(-10, 10, 0.1), 1.33), type="l")

par(mfrow=c(1, 1))

Assemble Sample dataframe

constructData2D <- function (func, xMin, xMax, precision, timeMin, timeMax)
{
  # Interval is bounded by gganimate - this just works, do not ask questions
  timeInterval = ((timeMax - timeMin) / 50) + 0.001
  
  data <- data.frame(X=0, Y=0, Time=0)
  i = 1
  for (t in seq(timeMin, timeMax, timeInterval))
  {
    for (x in seq(xMin, xMax, precision))
    {
      data[i,]$X = x
      data[i,]$Y = func(x, t)
      data[i,]$Time = t
      i = i + 1
    }
  }
  
  return (data)
}

timeData = constructData2D(f2D, -10, 10, 0.2, 0, 5)

## Output the dataframe and the graph it represents
str(timeData)
## 'data.frame':    5050 obs. of  3 variables:
##  $ X   : num  -10 -9.8 -9.6 -9.4 -9.2 -9 -8.8 -8.6 -8.4 -8.2 ...
##  $ Y   : num  -1.5 -1.47 -1.44 -1.41 -1.38 -1.35 -1.32 -1.29 -1.26 -1.23 ...
##  $ Time: num  0 0 0 0 0 0 0 0 0 0 ...
ggplot(timeData, aes(X, Y, group=Time)) +
  geom_path()

Animation of function over time

visualizeFunction <- function(dataframe)
{
  if (is.null(dataframe$OptimalX))
  {
    ggplot(dataframe, aes(X, Y, group=Time)) +
      geom_path() +
      transition_states(Time, transition_length=1, state_length=1, wrap=F) +
      labs(title = "Time: {closest_state}") +
      enter_fade() + 
      exit_fade()
  }
  else
  {
    ggplot(dataframe, aes(X, Y, group=Time)) +
      geom_path() +
      geom_point(x=dataframe$OptimalX, y=dataframe$OptimalY, colour="hotpink", size=4) +
      transition_states(Time, transition_length=1, state_length=1, wrap=F) +
      labs(title = "Time: {closest_state}") +
      enter_fade() + 
      exit_fade()
  }
}

visualizeFunction(timeData)

Setup Hill Climbing

The function updates the OptimalX and OptimalY columns of a dataframe containing X, Y, and Time to include the best results from hill climbing optimization function. It attempts to find the maxima.

ticksPerUnitTime controls how fast the hill climbing runs compared to the function. For every increase in the time value of 1.0, the hill climbing will update its value ticksPerUnitTime times.

costFunction is the function being optimized, of the form y = costFunction(x, time).

At time = 0, the algorithm will use a single random guess. That is, do not expect the first value to be a good one. Setting startX can control this instead of a random value.

windowSize controls how far the hill climb algorithm can guess for each guess. It will look up to (windowSize / 2) left or right from its current best

hillClimb <- function(timeDataFrame, ticksPerUnitTime, costFunction, startX = NULL, windowSize = 1)
{
  # Setup variables
  currentTime = min(timeDataFrame$Time)
  currentTick = 1
  currentSubset = timeDataFrame[timeDataFrame$Time == currentTime,]
  bestX = startX
  if (is.null(bestX))
  {
    bestX = runif(1, min(currentSubset$X), max(currentSubset$X))
  }
  bestY = costFunction(bestX, currentTime)
  width = windowSize / 2

  # Record optimal x and y
  timeDataFrame[timeDataFrame$Time == currentTime,]$OptimalX = rep(bestX, nrow(currentSubset))
  timeDataFrame[timeDataFrame$Time == currentTime,]$OptimalY = rep(bestY, nrow(currentSubset))

  # Iterate through time
  nextSubset = timeDataFrame[timeDataFrame$Time > currentTime,]
  while (nrow(nextSubset) > 0)
  {
    # Calculate number of hill climb ticks (guesses) to do
    nextTime = min(nextSubset$Time)
    iterations = floor(nextTime * ticksPerUnitTime) - currentTick
    
    # Update variables
    currentTime = nextTime
    currentSubset = timeDataFrame[timeDataFrame$Time == currentTime,]
    getOption <- function() { 
      return (runif(1, 
                    max(bestX - width, min(currentSubset$X)), 
                    min(bestX + width, max(currentSubset$X))))
    }
    bestY = costFunction(bestX, currentTime)

    # Actually do hill climbing
    if (iterations >= 1)
    for (i in 1:iterations)
    {
      currentTick = currentTick + 1
      testX = getOption()
      testY = costFunction(testX, currentTime)
      if (testY > bestY)
      {
        bestX = testX
        bestY = testY
      }
    }
    
    # Record new optimal x and y
    timeDataFrame$OptimalX[timeDataFrame$Time == currentTime] = rep(bestX, nrow(currentSubset))
    timeDataFrame$OptimalY[timeDataFrame$Time == currentTime] = rep(bestY, nrow(currentSubset))
    
    # Load next time subset
    nextSubset = timeDataFrame[timeDataFrame$Time > currentTime,]
  }
  
  return (timeDataFrame)
}

Run and Visualize Hill Climbing

# Approximately 1 guess per tick
timeData = hillClimb(timeData, 9, f2D, startX = -10)
## Warning in `[<-.data.frame`(`*tmp*`, timeDataFrame$Time == currentTime, :
## provided 4 variables to replace 3 variables

## Warning in `[<-.data.frame`(`*tmp*`, timeDataFrame$Time == currentTime, :
## provided 4 variables to replace 3 variables
visualizeFunction(timeData)
## Warning: Removed 101 rows containing missing values (geom_point).
## Warning: Removed 101 rows containing missing values (geom_point).

## Warning: Removed 101 rows containing missing values (geom_point).
# Approximately 3 guesses per tick
timeData = hillClimb(timeData, 28, f2D, startX = -10)
visualizeFunction(timeData)

# Approximately 0.3 guesses per tick
timeData = hillClimb(timeData, 3, f2D, startX = -10)
visualizeFunction(timeData)

Takeaways

NetLogo in R

This next section sets up an agent-based model that exhibits complexity. We will use it to for the following workshop, where you will attempt to optimize over the output from the model.

The model is a square grid of cells, wrapping on both axes, with one agent per cell. Each agent has a value that it can adjust up or down. An agent wants to have a higher value than three of its neighbors, but does not want to be the highest. Agents will:

Setup Utility Functions

# From https://stackoverflow.com/questions/2453326/fastest-way-to-find-second-third-highest-lowest-value-in-vector-or-column
maxN <- function(x, N=2){
  len <- length(x)
  if(N>len){
    warning('N greater than length(x).  Setting N=length(x)')
    N <- length(x)
  }
  sort(x,partial=len-N+1)[len-N+1]
}
# End sourced code

getAgentChange <- function(agentValue, neighborValues)
{
  neighborValues = c(neighborValues, agentValue)
  if (agentValue >= max(neighborValues))
    return (-3)
  if (agentValue < mean(neighborValues))
    return (1)
  if (agentValue >= mean(neighborValues) && agentValue < maxN(neighborValues, 2))
    return (1)
  return (0)
}

Setup grid

buildWorld <- function(height, width, startingValues = NULL)
{
  if (is.null(startingValues))
  {
    startingValues = rep(100, height * width)
  }
  densityValue = 1 / (height * width)
  sumStartValues = sum(startingValues)
  world <- data.frame(X=0, Y=0, Value=startingValues[1], Density = densityValue)
  i = 1
  for (x in 1:width)
  {
    for (y in 1:height)
    {
      world[i,]$X = x
      world[i,]$Y = y
      world[i,]$Value = startingValues[i]
      world[i,]$Density = startingValues[i] / sumStartValues
      i = i + 1
    }
  }
  
  return (world)
}

world = buildWorld(17, 17, seq(100, 128.9, 0.1))

ggplot(world, aes(X, Y, z = Density)) +
  geom_raster(aes(fill = Density)) +
  geom_contour(colour = "white")

Setup Changing Values

updateWorld <- function(world, updateFunction, orthogonalOnly = T)
{
  newWorld = buildWorld(max(world$X), max(world$Y))
  for (x in 1:max(world$X))
  {
    xLower = x - 1
    if (xLower == 0) xLower = max(world$X)
    xUpper = x + 1
    if (xUpper > max(world$X)) xUpper = 1
    
    for (y in 1:max(world$Y))
    {
      yLower = y - 1
      if (yLower == 0) yLower = max(world$Y)
      yUpper = y + 1
      if (yUpper > max(world$Y)) yUpper = 1
      
      neighborValues = c()
      neighborValues = c(neighborValues, world[world$X == xUpper & world$Y == y,]$Value)
      neighborValues = c(neighborValues, world[world$X == xLower & world$Y == y,]$Value)
      neighborValues = c(neighborValues, world[world$X == x & world$Y == yUpper,]$Value)
      neighborValues = c(neighborValues, world[world$X == x & world$Y == yLower,]$Value)

      
      if (!orthogonalOnly)
      {
        neighborValues = c(neighborValues, world[world$X == xUpper & world$Y == yUpper,]$Value)
        neighborValues = c(neighborValues, world[world$X == xUpper & world$Y == yLower,]$Value)
        neighborValues = c(neighborValues, world[world$X == xLower & world$Y == yUpper,]$Value)
        neighborValues = c(neighborValues, world[world$X == xLower & world$Y == yLower,]$Value)
      }

      currentValue = world[newWorld$X == x & newWorld$Y == y,]$Value
      newWorld[newWorld$X == x & newWorld$Y == y,]$Value = currentValue + updateFunction(currentValue, neighborValues)
    }
  }
  
  newWorld$Density = newWorld$Value / sum(newWorld$Value)
  
  return (newWorld)
}

world2 = updateWorld(world, getAgentChange)
ggplot(world2, aes(X, Y, z = Density)) +
  geom_raster(aes(fill = Density)) +
  geom_contour(colour = "white")

Construct time data

assembleData <- function (width, height, optimizeFunction = NULL, ticksPerTime = 1, addOptimalPoints = T, ...)
{
  lastStep = buildWorld(width, height, seq(100, 100+(height * width * 0.1), 0.1))
  lastStep$Time = rep(1, nrow(lastStep))
  finalData = NULL
  recordSteps = 25
  totalSteps = 50
  startRecording = totalSteps - recordSteps
  xGuess = floor(width / 2)
  yGuess = floor(height / 2)
  
  for (t in 2:totalSteps)
  {
    newData = updateWorld(lastStep, getAgentChange)
    newData$Time = rep(t, nrow(newData))
    if (addOptimalPoints)
    {
      bestX = newData[newData$Value == max(newData$Value),]$X[1]
      bestY = newData[newData$Value == max(newData$Value),]$Y[1]
      newData$OptimalX = rep(bestX, nrow(newData))
      newData$OptimalY = rep(bestY, nrow(newData))
    }
    if (!is.null(optimizeFunction))
    {
      guesses = optimizeFunction(newData, ticksPerTime, xGuess, yGuess, ...)
      xGuess = guesses[1]
      yGuess = guesses[2]
      newData$GuessedX = rep(xGuess, nrow(newData))
      newData$GuessedY = rep(yGuess, nrow(newData))
    }
    lastStep = newData
    
    if (t > startRecording && t < startRecording + recordSteps)
    {
      if(is.null(finalData))
        finalData = newData
      else
        finalData = rbind(finalData, newData)
    }
  }

  return(finalData)
}

finalData = assembleData(17, 17)

Prepare animated visualizer

visualizeFunction <- function(dataframe)
{
  animation = ggplot(dataframe, aes(X, Y, z = Density)) +
    geom_raster(aes(fill = Density)) +
    geom_contour(colour = "white", bins=7) + 
    transition_states(Time, transition_length = 10, state_length = 0, wrap=F) + 
    labs(title = "Time: {closest_state}") + 
    enter_fade() + 
    exit_fade() + 
    ease_aes("linear")
  if (!is.null(dataframe$OptimalX))
  {
    animation = animation + geom_point(x=dataframe$OptimalX, y=dataframe$OptimalY, colour="hotpink", size=4)
  }
  if (!is.null(dataframe$GuessedX))
  {
    animation = animation + geom_point(x=dataframe$GuessedX, y=dataframe$GuessedY, colour="lawngreen", size=4)
  }
  animation
}

Visualize

visualizeFunction(finalData)

Setup Optimization Library

All functions have a standardized signature:

Evaluation

evaluate <- function (results)
{
  averageDistance = 0
  for (t in min(results$Time):max(results$Time))
  {
    row = results[results$Time == t,][1,]
    distance = sqrt((row$OptimalX - row$GuessedX) ^ 2 + (row$OptimalY - row$GuessedY) ^ 2)
    averageDistance = averageDistance + distance
  }
  averageDistance = averageDistance / length(unique(results$Time))
  return (averageDistance)
}

Hill Climbing

hillClimb <- function(currentData, ticks, startX = 1, startY = 1)
{
  bestX = startX
  bestY = startY
  bestValue = currentData[currentData$X == bestX & currentData$Y == bestY,]$Value
  for (i in 1:ticks)
  {
    xLower = bestX - 1
    if (xLower == 0) xLower = max(currentData$X)
    xUpper = bestX + 1
    if (xUpper > max(currentData$X)) xUpper = 1
    yLower = bestY - 1
    if (yLower == 0) yLower = max(currentData$Y)
    yUpper = bestY + 1
    if (yUpper > max(currentData$Y)) yUpper = 1
    
    if (currentData[currentData$X == xLower & currentData$Y == bestY,]$Value > bestValue)
    {
      bestX = xLower
      bestValue = currentData[currentData$X == xLower & currentData$Y == bestY,]$Value
    }
    
    if (currentData[currentData$X == xUpper & currentData$Y == bestY,]$Value > bestValue)
    {
      bestX = xUpper
      bestValue = currentData[currentData$X == xUpper & currentData$Y == bestY,]$Value
    }
    
    if (currentData[currentData$X == bestX & currentData$Y == yUpper,]$Value > bestValue)
    {
      bestY = yUpper
      bestValue = currentData[currentData$X == bestX & currentData$Y == yUpper,]$Value
    }
    
    if (currentData[currentData$X == bestX & currentData$Y == yLower,]$Value > bestValue)
    {
      bestY = yLower
      bestValue = currentData[currentData$X == bestX & currentData$Y == yLower,]$Value
    }
  }
  
  return (c(bestX, bestY))
}

Simulated Annealing

Accepts two additional parameters:

  • coolingFunction - function that returns a percent likelihood to swap to a worse position.
  • temperatureTickAmount - each time the cooling function is called, its input increments by this amount. Determines how fast to slide along the cooling function.
linearCoolingFunction <- function (x) { return(1 - 0.1 * x) }
powerCoolingFunction <- function (x) { return(0.75 ^ x) }
decayCoolingFunction <- function (x) { return(1 / (x + 1)) }

simulatedAnnealing <- function(currentData, ticks, startX = 1, startY = 1, coolingFunction = NULL, temperatureTickAmount = NULL)
{
  if (is.null(coolingFunction))
  {
    stop("Simulated Annealing requires CoolingFunction!")
  }
  if (is.null(temperatureTickAmount))
  {
    stop("Simulated Annealing requires TemperatureTickAmount!")
  }
  
  bestX = startX
  bestY = startY
  bestValue = currentData[currentData$X == bestX & currentData$Y == bestY,]$Value
  currentCoolingValue = 0
  for (i in 1:ticks)
  {
    xLower = bestX - 1
    if (xLower == 0) xLower = max(currentData$X)
    xUpper = bestX + 1
    if (xUpper > max(currentData$X)) xUpper = 1
    yLower = bestY - 1
    if (yLower == 0) yLower = max(currentData$Y)
    yUpper = bestY + 1
    if (yUpper > max(currentData$Y)) yUpper = 1
    
    nextX = bestX
    nextY = bestY
    nextValue = 0
    
    if (currentData[currentData$X == xLower & currentData$Y == bestY,]$Value > nextValue)
    {
      nextX = xLower
      nextValue = currentData[currentData$X == xLower & currentData$Y == bestY,]$Value
    }
    
    if (currentData[currentData$X == xUpper & currentData$Y == bestY,]$Value > nextValue)
    {
      nextX = xUpper
      nextValue = currentData[currentData$X == xUpper & currentData$Y == bestY,]$Value
    }
    
    if (currentData[currentData$X == bestX & currentData$Y == yUpper,]$Value > nextValue)
    {
      nextY = yUpper
      nextValue = currentData[currentData$X == bestX & currentData$Y == yUpper,]$Value
    }
    
    if (currentData[currentData$X == bestX & currentData$Y == yLower,]$Value > nextValue)
    {
      nextY = yLower
      nextValue = currentData[currentData$X == bestX & currentData$Y == yLower,]$Value
    }
    
    # The part that makes this simulated annealing, not hill climbing
    currentCoolingValue = currentCoolingValue + temperatureTickAmount
    if ((nextValue < bestValue && runif(1) < coolingFunction(currentCoolingValue)) ||
        nextValue > bestValue)
    {
      bestValue = nextValue
      bestX = nextX
      bestY = nextY
    }
  }
  
  return (c(bestX, bestY))
}

Workshop

hillClimbOutput = assembleData(17, 17, hillClimb, ticksPerTime = 100)
cat("Hill climbing was, on average, off by", evaluate(hillClimbOutput), "units.\n")
## Hill climbing was, on average, off by 6.125965 units.
visualizeFunction(hillClimbOutput)

saOutput = assembleData(17, 17, simulatedAnnealing, ticksPerTime = 10, coolingFunction = decayCoolingFunction, temperatureTickAmount = 0.75)
cat("Simulated Annealing was, on average, off by", evaluate(saOutput), "units.\n")
## Simulated Annealing was, on average, off by 10.50083 units.
visualizeFunction(saOutput)